 
 
 capture cd "D:\Dropbox\book_welfare\replication"
 
 
 * The E: files are Bookstat data, which I can't share in raw form
 
 * get Bookstat metadata files into usable size 
 use E:\data\intermediate\clean_meta_ebook.dta, clear
	 keep asin author title pdate genre
 save E:\data\intermediate\clean_meta_ebook_small.dta, replace 
 
 use E:\data\intermediate\clean_meta_print.dta, clear
	 keep asin author title pdate genre
 save E:\data\intermediate\clean_meta_print_small.dta, replace 

 
 
 use E:\data\intermediate\clean_meta_ebook_small.dta, clear 
 gen ebook=1 
 append using  E:\data\intermediate\clean_meta_print_small.dta 
	replace ebook=0 if ebook==.
	replace author=substr(author,1,100)

	split author, parse(", " " & ")

	drop author4-author9 author??


forvalue k=1(1)3{
	replace author`k'=substr(author`k',1,15)
	split author`k', parse(" ")
}

	gen pyear=year(pdate)


	keep author title  genre pyear author11 author21 author31 asin 


		forvalues k=1(1)3 {
			gen name=upper(author`k'1)
			merge m:1 name using data\name_gender_wipo.dta
			rename mshare mshare`k'
			drop name 
			drop if _merge==2 
			drop _merge 
			}

			bysort asin: gen n=_n 
			keep if n==1 
			drop n 
save E:\data\intermediate\spine_gender_3authors.dta , replace




* attach gender data to index_spine.dta 
 	
 import excel data\mshare_hand_bookstat.xlsx, sheet("Sheet1") firstrow clear
	 keep author male_hand
	 keep if male_hand==0 | male_hand==1 
	 tempfile hand 
	 save `hand'

use E:\data\intermediate\spine_gender_3authors.dta , clear


preserve


 import excel data\mshare_hand_bookstat.xlsx, sheet("Sheet1") firstrow clear
	 keep author male_hand
	 keep if male_hand==0 | male_hand==1 
	 tempfile hand 
	 save `hand'
	 
restore 

merge m:1 author using `hand'


drop if _merge==2 
drop _merge 


merge m:1 genre using data\genre_cleaner.dta 
drop _merge 
drop genre 
rename genre1 genre 

keep author title  pyear  mshare* genre male_hand asin 


******************
* merge pseudonyms
******************
 	cd "D:\Dropbox\book_welfare\replication"


	gen penname=author 
	merge m:1 penname using data\pennames_from_trussel.dta
	* merge m:1 penname using data\pennames_from_trussel_new.dta
	
	egen mshare=rmean(mshare?)
	replace mshare = rmshare if rmshare~=.
	replace mshare = male_hand if male_hand~=.
	drop if _merge==2 
	drop _merge 
save  E:\data\intermediate\spine_gender_3authors_pseudonym.dta , replace 


****************************************
* make "supply" file
****************************************

use   E:\data\intermediate\spine_gender_3authors_pseudonym.dta , clear 
	gen x=1
 	collapse (count) total=x (count) N_name=mshare (sum) N_male=mshare, by(pyear genre)
	
save data\bookstat_gender_pyear_genre_asin.dta, replace  

****************************************
* make "demand" file 
****************************************


use  data\print_annual_2018.dta, clear 
 
forvalues k=2019(1)2021 {
	
	append using data\print_annual_`k'.dta
}


forvalues k=2018(1)2022 {
	
	append using data\ebook_annual_`k'.dta
}



merge m:1 asin using E:\data\intermediate\spine_gender_3authors_pseudonym.dta

gen q_name = q*(mshare~=.)
gen q_male = q*mshare 
collapse (sum) q q_name q_male, by(year pyear genre)

keep if pyear>=1900 & pyear<=2021 

save data\bookstat_sales_gender_pyear_genre_asin.dta, replace  


****************************************
* create 2021 file for welfare analysis
****************************************
cd "D:\Dropbox\book_welfare\replication"

use  data\print_annual_2018.dta, clear 
	append using data\ebook_annual_2018.dta
	forvalues k=2019(1)2021 {
		append using data\print_annual_`k'.dta
		append using data\ebook_annual_`k'.dta
	}


	
	collapse (sum) q (mean) p, by(year asin)

	merge m:1 asin using E:\data\intermediate\spine_gender_3authors_pseudonym.dta
	collapse (sum) q, by(author year)
	drop if year==.
	reshape wide q, i(author) j(year)
	mvencode q*, mv(0) o
save data\bs_q_by_author.dta, replace 

use  data\print_annual_2021.dta, clear 

	append using data\ebook_annual_2021.dta

	merge m:1 asin using E:\data\intermediate\asin_index.dta

	drop if _merge==2 
	drop _merge 
	
	collapse (sum) q (mean) p, by(year asin)


	merge m:1 asin using E:\data\intermediate\spine_gender_3authors_pseudonym.dta

	keep if _merge ==3 
	drop _merge 
	egen authorno = group(author)
	drop  title 
	
	
	gen q_name = q*(mshare~=.)
	gen q_male = q*mshare 

	keep if pyear>=1900 & pyear<=2021 
	merge m:1 author using data\bs_q_by_author.dta
	keep if _merge==3
	drop _merge 
	drop author
save data\bookstat_2021_welfare_asin.dta, replace  



*********************
* create decile files 
*********************





****************************************
* make "demand" file 
****************************************



use   data\print_annual_2018.dta, clear 
 
forvalues k=2019(1)2021 {
	
	append using data\print_annual_`k'.dta
}


forvalues k=2018(1)2022 {
	
	append using  data\ebook_annual_`k'.dta
}




	collapse (sum) q (mean) p, by(year asin)


	merge m:1 asin using E:\data\intermediate\spine_gender_3authors_pseudonym.dta


	* gen pyear = year(pdate)

	gsort year pyear genre -q 
	bysort year pyear genre: gen r=_n 
	bysort year pyear genre: gen R=_N 
	gen pct = r/R 
	drop r R 
	gen decile=int(10*pct-.00001)


	gen q_name = q*(mshare~=.)
	gen q_male = q*mshare 

	collapse (sum) q q_name q_male (count) n=q, by(year pyear genre decile)




	keep if pyear>=1900 & pyear<=2021 

save data\bookstat_sales_gender_pyear_genre_decile_asin.dta, replace  